This document presents the subset of the figures used for paper about monitoring.

Data and packages

library(dplyr)
library(tidyr)
library(ggplot2)
library(ggmap)
library(scatterpie)
library(rgdal)
library(multcompView)
library(car)
library(ggpmisc)
library(chisq.posthoc.test)
library(vcd)

Sampling data correspond to the data collected with kobo and previously cleaned with the script 1_preprocesamiento_datos_kobo.Rmd.

# load data
muestreo_tidy<-read.delim("../data/kobo/muestreo_dic2020_tidy.txt", header = TRUE)
parcelas_tidy<-read.delim("../data/kobo/parcelas_dic2020_tidy.txt", header = TRUE)

# pivot long parcelas data to have health data as a single variable
parcelas_long<-pivot_longer(parcelas_tidy, 
                            cols = healthy:worm, 
                            names_to = "tree_health_simplified",
                            values_to = "n_trees")

Data analyzed here correspond only to the trees that were approved during the validation by manually reviewing the photographs in kobotoolbox. Total of 1778 trees sampled, 1765 were approved in the validation.

muestreo_tidy<- filter(muestreo_tidy, X_validation_status=="validation_status_approved")

Color palettes:

# Make a nice color pallete and legend order for all plots

my_cols=c("darkgreen", 
              "darkred", 
              "orangered1", 
              "cadetblue", 
              "tan", 
              "beige", 
            #  "burlywood4", 
              "coral", 
              "aquamarine3", 
              "gray70", 
              "black")

desired_order=c("healthy", 
                "ozone", 
                "ozone_and_other", 
                "others_combined", 
                "drougth", 
                "fungi", 
             #   "insect", 
                "worm", 
                "acid_rain", 
                "other", 
                "dead")

desired_names=c("healthy", 
                "ozone", 
                "ozone and other", 
                "others combined", 
                "drougth", 
                "fungi", 
             #   "insect", 
                "worm", 
                "acid rain", 
                "other", 
                "dead")

# For ozone damage percentage 
 my_cols2<-c("darkgreen", "gold2", "chocolate1", "orangered", "red4", "darkorchid4")
 
desired_order_percentage<-c("0%","less than 10%", "10 to 40%", "40 to 50%", "50 to 70%", "more than 70%")

Multiplot fun:

# Multiple plot function
#
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
# - cols:   Number of columns in layout
# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
#
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
# then plot 1 will go in the upper left, 2 will go in the upper right, and
# 3 will go all the way across the bottom.
#
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  library(grid)

  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)

  numPlots = length(plots)

  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                    ncol = cols, nrow = ceiling(numPlots/cols))
  }

 if (numPlots==1) {
    print(plots[[1]])

  } else {
    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))

    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))

      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}

Configure google api for maps:

# code adapted from https://rgraphgallery.blogspot.com/2013/04/rg-plot-pie-over-g0ogle-map.html

## configure google api

# You first need to register your api key in https://cloud.google.com/maps-platform/#get-started and follow instructions. The geocoding API is a free service, but you nevertheless need to associate a credit card with the account. Please note that the Google Maps API is not a free service. There is a free allowance of 40,000 calls to the geocoding API per month, and beyond that calls are $0.005 each.
# after you obtain your api, save it in /scripts/api_key.api (not shown in this repo por obvious reasons).

# if you get the following error when running get_map():

#"Error in aperm.default(map, c(2, 1, 3)) : 
#  invalid first argument, must be an array " 

# check this troubleshooting: https://rgraphgallery.blogspot.com/2013/04/rg-plot-pie-over-google-map.html

##  load and register api
api <- readLines("api_key.api")
register_google(key = api)

Map and monitoring figures presented in the paper:

Figure 2

Plot 2a: PNDL location on CDMX map

# get cdmx shape
CDMX<-readOGR(dsn="../data/spatial", layer="CDMX")
## OGR data source with driver: ESRI Shapefile 
## Source: "/Users/veronicareyesgalindo/Documents/GitHub/monitoreo-oyameles/data/spatial", layer: "CDMX"
## with 1 features
## It has 8 fields
CDMX<-fortify(CDMX)

# get PNDL shape
PNDL<-readOGR(dsn="../data/spatial", layer="Desierto_Leones_Geo_ITRF08")
## Warning in OGRSpatialRef(dsn, layer, morphFromESRI = morphFromESRI, dumpSRS =
## dumpSRS, : Discarded datum International_Terrestrial_Reference_Frame_2008 in
## Proj4 definition: +proj=longlat +ellps=GRS80 +no_defs
## OGR data source with driver: ESRI Shapefile 
## Source: "/Users/veronicareyesgalindo/Documents/GitHub/monitoreo-oyameles/data/spatial", layer: "Desierto_Leones_Geo_ITRF08"
## with 1 features
## It has 14 fields
PNDL<-fortify(PNDL)

# get background map
sat_map = get_map(location = c(lon = -99.133549, lat = 19.3), zoom = 10, maptype = 'terrain-background', source = "google")

## plot
p_a<-ggmap(sat_map) + 
            geom_polygon(data = CDMX,
                         aes(x = long, y = lat, group = group),
                         color="black", fill=NA, size=1.5) +
            geom_polygon(data = PNDL,
                         aes(x = long, y = lat, group = group),
                         color="red", fill=NA, size=1.5) +
            geom_point(aes(x=-98.95, y=19.6), 
                       shape=0, stroke=2, size=5, color="black") +
            geom_point(aes(x=-98.95, y=19.55), 
                       shape=0, stroke=2, size=5, color="red") +
            geom_text(aes(label="CDMX", x=-98.87, y=19.6), 
                      color="Black", fontface="bold", size=5) +
            geom_text(aes(label="PNDL", x=-98.87, y=19.55), 
                      color="Black", fontface="bold", size=5) +
            theme(text = element_text(size = 20))+
  ggtitle("a)")

Plot 2b: Satellite image and surroundings of the PNDL

# get background map
sat_map = get_map(location = c(lon = -99.30, lat = 19.31), zoom = 13, maptype = 'satellite', source = "google")

## add towns names
towns<-data.frame(nombre=c("San Bartolo Ameyalco", 
                           "Santa Rosa Xochiac", 
                           "San Mateo Tlaltenango"),
                  long=c(-99.270, -99.29, -99.276),
                  lat=c(19.333, 19.325, 19.346))



## plot
p_b<-ggmap(sat_map) + 
            geom_polygon(data = PNDL,
                         aes(x = long, y = lat, group = group),
                         color="red", fill=NA, size=1.5) +
            geom_point(data=towns, aes(x=long, y=lat), colour="red", size=1.5) +
            geom_text(data=towns, aes(label=nombre, x=long, y=lat), 
                      color="white", fontface="bold",
                      size=5, nudge_y=0.003) +
  # add Cruz de Coloxtitla (CX), and Convento (Cn) landmarks
            geom_text(aes(label="X", x=-99.3014, y=19.286068), 
                      color="white", fontface="bold", size=4) +
            geom_text(aes(label="C", x=-99.31, y=19.3133), 
                      color="white", fontface="bold", size=4) +
            theme(text = element_text(size = 20))+
  ggtitle("b)")

Plot 2c: This is the distribution of the 48 plots:

## plot map
# get map
sat_map = get_map(location = c(lon = -99.3060, lat = 19.2909), zoom = 14, maptype = 'satellite', source = "google")

# plot sampled plots
p_c <-  ggmap(sat_map)
p_c <- p_c + geom_point(data=parcelas_tidy,
                      aes(x=X_coordinates_longitude,
                          y=X_coordinates_latitude),
                      color="red") +
          geom_text(data=parcelas_tidy,
                      aes(x=X_coordinates_longitude,
                          y=X_coordinates_latitude,
                          label=plot),
                      color="white",
                     check_overlap = TRUE,
                      hjust = 0, vjust=1, nudge_x = 0.0005,
                 size= 5) +
    theme(text = element_text(size = 20))+
  ggtitle("c)")

Plot 2d: Distribution of tree health status by plot

The following figure shows the total number of trees sampled in each 10x10 m plot, and how many of these are under some category of damage:

p_d <- ggplot(parcelas_long, aes(x=plot, y=n_trees,     fill=tree_health_simplified)) +
  geom_bar(stat="identity") +
  scale_fill_manual(values= my_cols, breaks = desired_order,
                    labels= desired_names,
                    name= "Health status") 
  

p_d <- p_d + theme_bw() +
  labs(x="Plots", y= "Number of trees") +
  theme(text = element_text(size = 20)) +
  ggtitle("d)")

Multiplot

multiplot(p_a, p_c, p_b, p_d, cols=2)

Figure 3

Figure 3a y b Reforested

# Select tree reforested data 
cont_tabl_R<- select(muestreo_tidy, tree_health_simplified, reforested)

cont_tabl_R<-cont_tabl_R%>%
  filter(tree_health_simplified == "healthy"| tree_health_simplified == "ozone" | tree_health_simplified == "ozone_and_other")

# Healthy
HRy<-cont_tabl_R%>%
  filter(tree_health_simplified == "healthy"  & reforested == "yes")
HRn<-cont_tabl_R%>%
  filter(tree_health_simplified == "healthy"  & reforested == "no" )

# Ozone
ORy<-cont_tabl_R%>%
  filter(tree_health_simplified == "ozone"  & reforested == "yes" )
ORn<-cont_tabl_R%>%
  filter(tree_health_simplified == "ozone"  & reforested == "no" )

# Ozone y others
OoRy<-cont_tabl_R%>%
  filter(tree_health_simplified == "ozone_and_other"  & reforested == "yes" )
OoRn<-cont_tabl_R%>%
  filter(tree_health_simplified == "ozone_and_other"  & reforested == "no" )

# Make table
reforested<- as.table(rbind(c(nrow(HRy),nrow(ORy),nrow(OoRy)),c(nrow(HRn),nrow(ORn),nrow(OoRn))))

# Named
dimnames(reforested) <- list("reforested" = c("yes", "no"), "health status" = c("healthy","ozone","ozone and other"))# Pass data matrix to chisq.posthoc.test function

reforested
##           health status
## reforested healthy ozone ozone and other
##        yes     106    31              64
##        no      383    94             435
# Baeplot
p_3a<-ggplot(cont_tabl_R, aes(reforested, ..count..)) +
  geom_bar(aes(fill = tree_health_simplified), position = "dodge")+
  scale_fill_manual(name ="Health status", 
                    values = c("healthy" = "darkgreen", "ozone" = "darkred", "ozone_and_other" = "orangered1"), labels= c("healthy", "ozone","ozone and other"))+
  theme_bw()+
  ggtitle("a)")+
  theme(legend.title.align = 0.5)+
  theme(text = element_text(size = 20))+
  theme(plot.title = element_text(lineheight=1.1, face="bold"))+
  labs(y="Number of trees", x= "Reforested")
  
# Mosaic Plot with vcd library
p_3b<-mosaic(reforested, shade=TRUE, legend=TRUE,
                labeling_args=list(rot_labels=c(bottom=90,top=0),gp_labels=(gpar(fontsize=12))))

Figure 3c y d Covered

cont_tabl_C<- select(muestreo_tidy, tree_health_simplified, tree_exposition)

cont_tabl_C<-cont_tabl_C%>%
  filter(tree_health_simplified == "healthy"| tree_health_simplified == "ozone" | tree_health_simplified == "ozone_and_other")


# Healthy
HCy<-cont_tabl_C%>%
  filter(tree_health_simplified == "healthy"  & tree_exposition == "cover")
HCn<-cont_tabl_C%>%
  filter(tree_health_simplified == "healthy"  & tree_exposition == "exposed" )

# Ozone
OCy<-cont_tabl_C%>%
  filter(tree_health_simplified == "ozone"  & tree_exposition == "cover" )
OCn<-cont_tabl_C%>%
  filter(tree_health_simplified == "ozone"  & tree_exposition == "exposed" )

# Ozone y others
OoCy<-cont_tabl_C%>%
  filter(tree_health_simplified == "ozone_and_other"  & tree_exposition == "cover" )
OoCn<-cont_tabl_C%>%
  filter(tree_health_simplified == "ozone_and_other"  & tree_exposition == "exposed" )

### Cargar datos
covered<- as.table(rbind(c(nrow(HCy),nrow(OCy),nrow(OoCy)),
                              c(nrow(HCn),nrow(OCn),nrow(OoCn))))
dimnames(covered) <- list("covered" = c("yes", "no"), "health status" = c("healthy","ozone", "ozone and other"))# Pass data matrix to chisq.posthoc.test function

covered
##        health status
## covered healthy ozone ozone and other
##     yes     322    62             319
##     no      167    63             180
#Barplot
p_3c<-ggplot(cont_tabl_C, aes(tree_exposition, ..count..)) +
  geom_bar(aes(fill = tree_health_simplified), position = "dodge")+
  scale_fill_manual(name ="Health status", 
                    values = c("healthy" = "darkgreen", "ozone" = "darkred", "ozone_and_other" = "orangered1"), labels= c("healthy", "ozone","ozone and other"))+
  theme_bw()+
  ggtitle("c)")+
  theme(legend.title.align = 0.5)+
  theme(text = element_text(size = 20))+
  theme(plot.title = element_text(lineheight=1.1, face="bold"))+
  labs(y="Number of trees", x= "Tree exposition")

# Mosaic Plot with vcd library
p_3d <- mosaic(covered, shade=TRUE, legend=TRUE, labeling_args=list(rot_labels=c(bottom=90,top=0),gp_labels=(gpar(fontsize=12))))

Multiplot

multiplot(p_3a, p_3c, p_3b, p_3d, cols=2)

##            health status healthy ozone ozone and other
## reforested                                            
## yes                          106    31              64
## no                           383    94             435
##         health status healthy ozone ozone and other
## covered                                            
## yes                       322    62             319
## no                        167    63             180

Figure 4

Plot 4a

p <- filter(muestreo_tidy, tree_heigth<15, tree_nodes>0) %>% 
     ggplot(.) +
     scale_fill_manual(values= my_cols, breaks = desired_order,
                    labels= desired_names,
                    name= "Health status") +
theme_bw()

p4_a <- p + geom_histogram(aes(x=tree_nodes, 
                      fill=tree_health_simplified))  +
    labs(x="Tree age (years)", y= "Number of trees") +
    theme(text = element_text(size = 20)) +
     theme(plot.title = element_text(lineheight=1.1, face="bold"))+
  ggtitle("a)")
p4_a

Plot 4b

## base data
# Definir plantas sanas y dañadas por otra cosa que no fuera ozono
# cond_PO<- se  refiere a condition Percentage damage by Ozone 
cond_PO<-as_data_frame(muestreo_tidy)
## Warning: `as_data_frame()` was deprecated in tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
# Asignar 0% de daño por ozono a los árboles healthy
cond_PO$ozone_damage_percentage = ifelse(cond_PO$tree_health == "healthy", "0%", cond_PO$ozone_damage_percentage)

# Filtrar por porcentaje de daño
condition_PO<-cond_PO%>%
  filter(ozone_damage_percentage == "0%" | ozone_damage_percentage == "less than 10%" | ozone_damage_percentage == "10 to 40%" | ozone_damage_percentage == "40 to 50%"| ozone_damage_percentage == "50 to 70%" | ozone_damage_percentage == "more than 70%")

condition_PO$ozone_damage_percentage <- as.factor(condition_PO$ozone_damage_percentage)


# Plot
p_od<- condition_PO %>% filter(!is.na(ozone_damage_percentage)) %>%
            ggplot() +
            scale_fill_manual(values= my_cols2, 
                              breaks = desired_order_percentage,
                              labels = c("0%","less 10%", "10 to 40%", "40 to 50%",
                                         "50 to 70%", "more 70%"),
                              name= "Ozone damage\n per tree") +
            theme_bw() + theme(text = element_text(size = 20)) 

p4_b <- p_od +
  geom_bar(aes(x=tree_nodes,
               fill=ozone_damage_percentage)) +
  labs(x="Tree age (years)", y= "Number of trees") +
  theme(legend.title.align = 0.5)+
  theme(plot.title = element_text(lineheight=1.1, face="bold"))+
  ggtitle("b)")

p4_b
## Warning: Removed 147 rows containing non-finite values (stat_count).

Plot 4c

# Filtrar por categoría de daño
condition_HOO<-muestreo_tidy%>%
  filter(tree_health_simplified == "healthy" | tree_health_simplified == "ozone" | tree_health_simplified == "ozone_and_other" )

condition_HOO$tree_health_simplified <- as.factor(condition_HOO$tree_health_simplified)


# Data distribution

# Los datos tienen a graficar es el número de nodos para cada categoria de salud.
# Los datos son continuos discretos, por lo tanto el analisis a seguir para buscar diferencias entre los grupos son:
# Puedo representar los resultados en boxplot
p4_c<-condition_HOO%>%
   ggplot(aes(y= tree_nodes, x= tree_health_simplified))+
          geom_boxplot(color="grey", notch = F)+
        scale_color_manual(values=  my_cols, labels= desired_names,
                    name= "Health status")+
        geom_point(position="jitter",aes(color = tree_health_simplified), alpha=0.5, size= 2.5)+
        xlab("")+ ylab("Tree age (years)")+
  theme_bw()+
  ggtitle("c)")+
  theme(text = element_text(size = 20), axis.text.x=element_blank())+
  theme(plot.title = element_text(lineheight=1.1, face="bold"))
p4_c
## Warning: Removed 147 rows containing non-finite values (stat_boxplot).
## Warning: Removed 147 rows containing missing values (geom_point).

# Statistics
group_by(condition_HOO, tree_health_simplified) %>%
  summarise(
    count = n(),
    mean = mean(tree_nodes, na.rm = TRUE),
    sd = sd(tree_nodes, na.rm = TRUE)
  )
# Prueba de normalidad en anova

# ANOVA
a <- aov(sqrt(tree_nodes) ~ tree_health_simplified, data = condition_HOO)
# Summary of the analysis
summary(a)
##                         Df Sum Sq Mean Sq F value Pr(>F)    
## tree_health_simplified   2  111.7   55.86     171 <2e-16 ***
## Residuals              963  314.5    0.33                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 147 observations deleted due to missingness
# rechazamos la hipótesis nula de que las medias son iguales, y decidimos que la media del numero de nodos es distinta entre las categorias.

# Normalidad. No hay en valores menores a 0.05

# Extract the residuals
aov_residuals <- residuals(object = a )
# Run Shapiro-Wilk test
shapiro.test(x = aov_residuals )
## 
##  Shapiro-Wilk normality test
## 
## data:  aov_residuals
## W = 0.98232, p-value = 1.98e-09
# Procedo a hacer un kruskal

# Debe tener homogeneidad
leveneTest(sqrt(tree_nodes) ~ tree_health_simplified, data = condition_HOO, center = "median")
kruskal.test(sqrt(tree_nodes) ~ tree_health_simplified, data = condition_HOO)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  sqrt(tree_nodes) by tree_health_simplified
## Kruskal-Wallis chi-squared = 265.57, df = 2, p-value < 2.2e-16
#Prueba homogeneidad de varianzas y sí hay :(
leveneTest(sqrt(tree_nodes) ~ tree_health_simplified, data = condition_HOO, center = "median")
#poshoc que grupos difieren
pairwise.wilcox.test(x = sqrt(condition_HOO$tree_nodes), g = condition_HOO$tree_health_simplified, p.adjust.method = "holm" )
## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  sqrt(condition_HOO$tree_nodes) and condition_HOO$tree_health_simplified 
## 
##                 healthy ozone
## ozone           <2e-16  -    
## ozone_and_other <2e-16  0.012
## 
## P value adjustment method: holm

Plot 4d

# Plot
p4_d<-condition_PO%>% filter(!is.na(ozone_damage_percentage)) %>%
   ggplot(aes(y= tree_nodes, x= ozone_damage_percentage))+
          geom_boxplot(color="grey", notch = F)+
        scale_color_manual(values=  my_cols2,labels = c("0%","less 10%", "10 to 40%", "40 to 50%",
                                         "50 to 70%", "more 70%"))+
        geom_point(position="jitter",aes(color = ozone_damage_percentage), alpha=0.5, size= 2.5)+
        xlab("")+ ylab("Tree age (years)")+
  labs(color = "Ozone damage\n per tree")+
  theme_bw()+
  ggtitle("d)")+
  theme(legend.title.align = 0.5)+
  theme(text = element_text(size = 20), axis.text.x=element_blank())+
  theme(plot.title = element_text(lineheight=1.1, face="bold"))
p4_d
## Warning: Removed 147 rows containing non-finite values (stat_boxplot).
## Warning: Removed 147 rows containing missing values (geom_point).

# Pruebas estadisticas
# ANOVA
a <- aov(sqrt(tree_nodes) ~ ozone_damage_percentage, data = condition_HOO)
# Summary of the analysis
summary(a)
##                          Df Sum Sq Mean Sq F value  Pr(>F)    
## ozone_damage_percentage   4  10.49   2.623   8.022 2.8e-06 ***
## Residuals               516 168.73   0.327                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 592 observations deleted due to missingness
# rechazamos la hipótesis nula de que las medias son iguales, y decidimos que la media del numero de nodos es distinta entre las categorias.

# Normalidad. No hay en valores menores a 0.05
# Extract the residuals
aov_residuals <- residuals(object = a )
# Run Shapiro-Wilk test
shapiro.test(x = aov_residuals )
## 
##  Shapiro-Wilk normality test
## 
## data:  aov_residuals
## W = 0.99127, p-value = 0.003627
# Procedo a hacer un kruskal
# Debe tener homogeneidad. Si es mayor a 0.05 No hay evidencias en contra de la homogeneidad de varianzas. 
leveneTest(sqrt(tree_nodes) ~ ozone_damage_percentage, data = condition_HOO, center = "median")
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
kruskal.test(sqrt(tree_nodes) ~ ozone_damage_percentage, data = condition_HOO)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  sqrt(tree_nodes) by ozone_damage_percentage
## Kruskal-Wallis chi-squared = 29.898, df = 4, p-value = 5.133e-06
#poshoc que grupos difieren
pairwise.wilcox.test(x = sqrt(condition_HOO$tree_nodes), g = condition_HOO$ozone_damage_percentage, p.adjust.method = "bonferroni" )
## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  sqrt(condition_HOO$tree_nodes) and condition_HOO$ozone_damage_percentage 
## 
##               10 to 40% 40 to 50% 50 to 70% less than 10%
## 40 to 50%     1.0000    -         -         -            
## 50 to 70%     0.0018    0.3516    -         -            
## less than 10% 0.4332    0.1390    1.9e-06   -            
## more than 70% 1.0000    1.0000    0.1373    1.0000       
## 
## P value adjustment method: bonferroni

Multiplot

multiplot(p4_a, p4_c, p4_b, p4_d, cols=2)
## Warning: Removed 147 rows containing non-finite values (stat_boxplot).
## Warning: Removed 147 rows containing missing values (geom_point).
## Warning: Removed 147 rows containing non-finite values (stat_count).
## Warning: Removed 147 rows containing non-finite values (stat_boxplot).
## Warning: Removed 147 rows containing missing values (geom_point).

Figure 5

# Filtrar por categoría de daño
condition_HOO<-muestreo_tidy%>%
  filter(tree_health_simplified == "healthy" | tree_health_simplified == "ozone" | tree_health_simplified == "ozone_and_other" )

condition_HOO$tree_health_simplified <- as.factor(condition_HOO$tree_health_simplified)

# Modelo 3  - Edad, salud y estructura espacial afectan crecimiento
glm3<-glm(log10(tree_heigth) ~ tree_nodes*tree_health_simplified + tree_exposition + reforested, data = condition_HOO)

# Ho= hay normalidad, si pvalues es mayor a 0.05 se acepta la Ho por lo tanto pvalue mayor a 0.05 hay normalidad 
shapiro.test(glm3$residuals) # Normalidad ( Datos normales si es mayor a 0.05)
## 
##  Shapiro-Wilk normality test
## 
## data:  glm3$residuals
## W = 0.99778, p-value = 0.2238
cor.test(abs(glm3$residuals), glm3$fitted.values) #Homocedasticidad: Valor no significativa sig que si hay homocedasticidad
## 
##  Pearson's product-moment correlation
## 
## data:  abs(glm3$residuals) and glm3$fitted.values
## t = 1.4722, df = 964, p-value = 0.1413
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.0157601  0.1101083
## sample estimates:
##        cor 
## 0.04736209
par(mfrow =c(2,2))
plot(glm3)

summary(glm3)
## 
## Call:
## glm(formula = log10(tree_heigth) ~ tree_nodes * tree_health_simplified + 
##     tree_exposition + reforested, data = condition_HOO)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -0.94431  -0.19691   0.01762   0.18800   0.84646  
## 
## Coefficients:
##                                                   Estimate Std. Error t value
## (Intercept)                                      -0.694392   0.033429 -20.772
## tree_nodes                                        0.108988   0.004806  22.677
## tree_health_simplifiedozone                       0.218104   0.096575   2.258
## tree_health_simplifiedozone_and_other             0.426346   0.053116   8.027
## tree_expositionexposed                            0.069066   0.020785   3.323
## reforestedyes                                     0.032913   0.024310   1.354
## tree_nodes:tree_health_simplifiedozone           -0.013930   0.010673  -1.305
## tree_nodes:tree_health_simplifiedozone_and_other -0.037654   0.006213  -6.060
##                                                  Pr(>|t|)    
## (Intercept)                                       < 2e-16 ***
## tree_nodes                                        < 2e-16 ***
## tree_health_simplifiedozone                      0.024145 *  
## tree_health_simplifiedozone_and_other            2.92e-15 ***
## tree_expositionexposed                           0.000925 ***
## reforestedyes                                    0.176097    
## tree_nodes:tree_health_simplifiedozone           0.192146    
## tree_nodes:tree_health_simplifiedozone_and_other 1.95e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.08797878)
## 
##     Null deviance: 224.146  on 965  degrees of freedom
## Residual deviance:  84.284  on 958  degrees of freedom
##   (147 observations deleted due to missingness)
## AIC: 403.34
## 
## Number of Fisher Scoring iterations: 2
# Graficar modelo 3
Tree_height_plot<-ggplot(condition_HOO, aes(x = tree_nodes, y = log(tree_heigth))) + geom_point(aes(colour=tree_health_simplified), alpha=0.5, size= 2.5) + geom_smooth(method="glm",aes(color= tree_health_simplified), fullrange =T)+
  labs( y = "log(Tree higth)", x = "Tree age (years)", color = "Health status")+
  scale_color_manual(values=  my_cols, labels= desired_names,
                    name= "Health status")+
  theme(text = element_text(size = 20), axis.text.x=element_blank())+
  theme(plot.title = element_text(lineheight=1.1, face="bold"))+
  theme_bw()

Tree_height_plot
## Warning: Removed 147 rows containing non-finite values (stat_smooth).
## Warning: Removed 147 rows containing missing values (geom_point).

Figure S2

# plot pies in map
p_satmap <-  ggmap(sat_map)
p_satmap +geom_scatterpie(data=parcelas_tidy,
                aes(x=X_coordinates_longitude,
                    y=X_coordinates_latitude,
                    group=plot),
                pie_scale = 1.5,
                cols=desired_order,
                color=NA,
                alpha=1)  +
  scale_fill_manual(values= my_cols, breaks = desired_order,
                    labels= desired_names,
                    name= "Health status") +
  theme(text = element_text(size = 20))

Figure S3

# Create new variable with porcentage of ozonoe damage
parcelas_tidy<-parcelas_tidy %>% rowwise() %>% 
                     mutate(., 
                      total=sum(healthy,ozone,ozone_and_other,
                          drougth, acid_rain, other,
                          others_combined, dead, fungi,
                          # insect, 
                          worm)) %>%
                    mutate(perc.ozone= sum(ozone, ozone_and_other)/total)

#plot
p <- ggplot(parcelas_tidy) +
     geom_point(aes(x=X_coordinates_altitude,
             y=perc.ozone))



p<- ggplot(parcelas_tidy, aes(X_coordinates_altitude, perc.ozone ))+
  geom_point(color= "grey50", size = 3, alpha = 0.6)

p + 
  stat_smooth(color = "skyblue", formula = y ~ x,fill = "skyblue", method = "lm") +
  stat_poly_eq(
    aes(label = paste(..eq.label.., ..adj.rr.label.., sep = '~~~~')),
    formula = y ~ x,  parse = TRUE,
      size = 10, # Tamaño de fuente de la fórmula
             label.x = 0.1, #location, la proporción entre 0-1
      label.y = 0.95)+
  labs(x="Plot altitude", y= "Percentage of ozone damaged trees")+
  theme_bw() +
   theme(plot.title = element_text(lineheight=1.1, face="bold")) +
  theme(text = element_text(size = 20))

# Figura S4